# Now we need to add in the rest of the data from Firre databasehome <- here::here()filenames <-list.files(paste0(home,"/data/temp_data_fishing_firre"))raw_data<-NULLfor(i in1:length(filenames)) { dat <-read_excel(paste0(home,"/data/temp_data_fishing_firre/",filenames[i]))if(i ==1) names <-colnames(dat)if(i !=1) names(dat) <- names area <-substr(filenames[i], 1, 2)if(area =="SI") area <-substr(filenames[i], 1, 5)#if(area=="JM") area <- paste0(substr(filenames[i],1,2),substr(filenames[i],9,10))if(area =="JMT0") area <-substr(filenames[i], 1, 2) dat$area <- area raw_data <-data.frame(rbind(raw_data, dat))} # JMT0 is really JM_T10 and taken as the main temperature time series for JM# Here I get a warning when I convert to data from Year, Week, day-of-week. Could be due to different date standards (week starting with 0 instead of 1, which I think is the ISO way and how our data are coded. It's however not an option in as.Date, which is why i looked into `ISOweek2date`. But that throws an error for some of the dates. Hmm. A simple test however shows that as.Date returns the correct day (double checked). The warnings from as.Date and the error from ISOweek2date could be due to error in data entry. Will explore. # https://stackoverflow.com/questions/45549449/transform-year-week-to-date-object# https://www.r-bloggers.com/2013/08/date-formats-in-r/firre_dat <- raw_data |>mutate(group =ifelse(VkDag >100, "2", "1")) |>mutate(VkDag2 =ifelse(group =="1", paste0(0, VkDag), VkDag)) |># add zero before strings with length 2...separate(VkDag2, sep =2, into =c("week", 'day'), extra ='drop', remove =FALSE) |>mutate(week =as.numeric(week), # to get rid of the 0day =as.numeric(day)) |>mutate(date =as.Date(paste(Årtal, week, day, sep ="-"), "%Y-%U-%u")) |># mutate(week2 = paste0("W", week),# weekdate = paste(Årtal, week2, day, sep = "-")) #|> mutate(#date = ISOweek2date(weekdate),yday =yday(date),month =month(date)) |>rename(year = Årtal,stn_nr = Station,section_nr = Sektion) |>filter(!if_all(c(MedelFörTemperatur_i, MedelFörTemperatur_u), is.na)) |>mutate(MedelFörTemperatur_i =ifelse(is.na(MedelFörTemperatur_i), MedelFörTemperatur_u, MedelFörTemperatur_i), MedelFörTemperatur_u =ifelse(is.na(MedelFörTemperatur_u), MedelFörTemperatur_i, MedelFörTemperatur_u)) |>mutate(temp = (MedelFörTemperatur_i + MedelFörTemperatur_u) /2,db ="FIRRE",id =paste(year, area, sep ="_")) |>select(year, yday, month, day, date, VkDag, temp, area, stn_nr, section_nr, db, id) |>drop_na(date) |>mutate(date =as.character(date))#> Warning: There were 24 warnings in `mutate()`.#> The first warning was:#> ℹ In argument: `date = as.Date(paste(Årtal, week, day, sep = "-"),#> "%Y-%U-%u")`.#> Caused by warning in `strptime()`:#> ! (0-based) yday 369 in year 1972 is invalid#> ℹ Run `dplyr::last_dplyr_warnings()` to see the 23 remaining warnings.# Now we need to merge it with fish_datfish_dat <-bind_rows(firre_dat |>filter(!id %in% kul_dat$id), # remove duplicates kul_dat) |>mutate(source ="fishing")head(fish_dat)#> year yday month day date VkDag temp area stn_nr section_nr db#> 1 1991 239 8 2 1991-08-27 342 15.75 BS 1 1 FIRRE#> 2 1992 224 8 2 1992-08-11 322 16.65 BS 1 1 FIRRE#> 3 1992 226 8 4 1992-08-13 324 16.60 BS 1 1 FIRRE#> 4 1992 228 8 6 1992-08-15 326 15.85 BS 1 1 FIRRE#> 5 1994 266 9 5 1994-09-23 385 12.30 BS 1 1 FIRRE#> 6 1991 239 8 2 1991-08-27 342 15.65 BS 2 1 FIRRE#> id source#> 1 1991_BS fishing#> 2 1992_BS fishing#> 3 1992_BS fishing#> 4 1992_BS fishing#> 5 1994_BS fishing#> 6 1991_BS fishingfish_dat |>filter(area =="BT") |>group_by(source) |>summarise(min_yr =min(year))#> # A tibble: 1 × 2#> source min_yr#> <chr> <dbl>#> 1 fishing 1984# Plot data over yearfish_dat |>ggplot(aes(x = year, y = temp, color = yday)) +geom_point(size =0.25) +scale_color_viridis() +theme(plot.title =element_text(size =15, face ="bold")) +theme(axis.text.x =element_text(angle =90)) +theme(axis.text =element_text(size =12), axis.title =element_text(size =15)) +guides(color =guide_legend(override.aes =list(size =1))) +labs(x ="Year", y ="Temperature") +facet_wrap(~area) +NULL
Code
# Plot data over yeardayfish_dat |>ggplot(aes(x = yday, y = temp, color =factor(year))) +geom_point(size =0.25) +scale_color_viridis(discrete =TRUE) +theme(plot.title =element_text(size =15, face ="bold")) +theme(axis.text =element_text(size =12), axis.title =element_text(size =15)) +theme(axis.text.x =element_text(angle =90)) +guides(color =guide_legend(override.aes =list(size =1))) +labs(x ="Day-of-the-year", y ="Temperature") +facet_wrap(~area) +NULL
ERSST data
Code
# Longitude and latitude attributes for each areaarea <-c("BS", "BT", "FB", "FM", "HO", "JM", "MU", "RA", "SI_EK", "SI_HA", "TH", "VN")nareas <-length(area)lat <-c(60, 60.4, 60.3, 60.5, 63.7, 58, 59, 65.9, 57.3, 57.4, 56.1, 57.5)lon <-c(21.5, 18.1, 19.5, 18, 20.9, 16.8, 18.1, 22.3, 16.6, 16.7, 15.9, 16.9)area_attr <-data.frame(cbind(area = area, lat = lat, lon = lon)) |>mutate_at(c("lat", "lon"), as.numeric)# Download ERSST data - only run when update needed# sst_dat_all <- get_ersst_data(years=seq(1940,2022),data.dir=paste0(home,"/data"), latrange=c(55,66),lonrange=c(15,23))# SST based on ERSST data with relatively low spatial resolution (2x2 degrees)# Need to cover at least 2x2 grid area with even numbers for longitude/latitudesst_areas <-NULLlat_ranges <- lon_ranges <-list() # for testing onlyfor(a in1:nareas) { lat_range <-c(2*floor(area_attr$lat[a]/2), 2*floor(area_attr$lat[a]/2) +2) lon_range <-c(2*floor(area_attr$lon[a]/2), 2*floor(area_attr$lon[a]/2) +2) sst_area <-load_ersst_data(years =c(1940, 2022), data.dir =paste0(home, "/data"), ncfilename ="sst.mnmean.nc", latrange = lat_range, lonrange = lon_range) sst_area$area <- area_attr$area[a] sst_areas <-bind_rows(sst_areas, sst_area) lat_ranges[[a]] <- lat_range lon_ranges[[a]] <- lon_range}#> Joining with `by = join_by(lon_index)`#> Joining with `by = join_by(lat_index)`#> Joining with `by = join_by(time_index)`#> Rows: 3944 Columns: 7#> ── Column specification#> ──────────────────────────────────────────────────────── Delimiter: "," chr#> (1): month dbl (5): sst, lon, lat, time, year date (1): date#> ℹ Use `spec()` to retrieve the full column specification for this data. ℹ#> Specify the column types or set `show_col_types = FALSE` to quiet this message.#> `summarise()` has grouped output by 'year'. You can override using the#> `.groups` argument.#> Joining with `by = join_by(lon_index)`#> Joining with `by = join_by(lat_index)`#> Joining with `by = join_by(time_index)`#> Rows: 3944 Columns: 7#> ── Column specification#> ──────────────────────────────────────────────────────── Delimiter: "," chr#> (1): month dbl (5): sst, lon, lat, time, year date (1): date#> ℹ Use `spec()` to retrieve the full column specification for this data. ℹ#> Specify the column types or set `show_col_types = FALSE` to quiet this message.#> `summarise()` has grouped output by 'year'. You can override using the#> `.groups` argument.#> Joining with `by = join_by(lon_index)`#> Joining with `by = join_by(lat_index)`#> Joining with `by = join_by(time_index)`#> Rows: 3944 Columns: 7#> ── Column specification#> ──────────────────────────────────────────────────────── Delimiter: "," chr#> (1): month dbl (5): sst, lon, lat, time, year date (1): date#> ℹ Use `spec()` to retrieve the full column specification for this data. ℹ#> Specify the column types or set `show_col_types = FALSE` to quiet this message.#> `summarise()` has grouped output by 'year'. You can override using the#> `.groups` argument.#> Joining with `by = join_by(lon_index)`#> Joining with `by = join_by(lat_index)`#> Joining with `by = join_by(time_index)`#> Rows: 3944 Columns: 7#> ── Column specification#> ──────────────────────────────────────────────────────── Delimiter: "," chr#> (1): month dbl (5): sst, lon, lat, time, year date (1): date#> ℹ Use `spec()` to retrieve the full column specification for this data. ℹ#> Specify the column types or set `show_col_types = FALSE` to quiet this message.#> `summarise()` has grouped output by 'year'. You can override using the#> `.groups` argument.#> Joining with `by = join_by(lon_index)`#> Joining with `by = join_by(lat_index)`#> Joining with `by = join_by(time_index)`#> Rows: 3944 Columns: 7#> ── Column specification#> ──────────────────────────────────────────────────────── Delimiter: "," chr#> (1): month dbl (5): sst, lon, lat, time, year date (1): date#> ℹ Use `spec()` to retrieve the full column specification for this data. ℹ#> Specify the column types or set `show_col_types = FALSE` to quiet this message.#> `summarise()` has grouped output by 'year'. You can override using the#> `.groups` argument.#> Joining with `by = join_by(lon_index)`#> Joining with `by = join_by(lat_index)`#> Joining with `by = join_by(time_index)`#> Rows: 3944 Columns: 7#> ── Column specification#> ──────────────────────────────────────────────────────── Delimiter: "," chr#> (1): month dbl (5): sst, lon, lat, time, year date (1): date#> ℹ Use `spec()` to retrieve the full column specification for this data. ℹ#> Specify the column types or set `show_col_types = FALSE` to quiet this message.#> `summarise()` has grouped output by 'year'. You can override using the#> `.groups` argument.#> Joining with `by = join_by(lon_index)`#> Joining with `by = join_by(lat_index)`#> Joining with `by = join_by(time_index)`#> Rows: 3944 Columns: 7#> ── Column specification#> ──────────────────────────────────────────────────────── Delimiter: "," chr#> (1): month dbl (5): sst, lon, lat, time, year date (1): date#> ℹ Use `spec()` to retrieve the full column specification for this data. ℹ#> Specify the column types or set `show_col_types = FALSE` to quiet this message.#> `summarise()` has grouped output by 'year'. You can override using the#> `.groups` argument.#> Joining with `by = join_by(lon_index)`#> Joining with `by = join_by(lat_index)`#> Joining with `by = join_by(time_index)`#> Rows: 3944 Columns: 7#> ── Column specification#> ──────────────────────────────────────────────────────── Delimiter: "," chr#> (1): month dbl (5): sst, lon, lat, time, year date (1): date#> ℹ Use `spec()` to retrieve the full column specification for this data. ℹ#> Specify the column types or set `show_col_types = FALSE` to quiet this message.#> `summarise()` has grouped output by 'year'. You can override using the#> `.groups` argument.#> Joining with `by = join_by(lon_index)`#> Joining with `by = join_by(lat_index)`#> Joining with `by = join_by(time_index)`#> Rows: 3944 Columns: 7#> ── Column specification#> ──────────────────────────────────────────────────────── Delimiter: "," chr#> (1): month dbl (5): sst, lon, lat, time, year date (1): date#> ℹ Use `spec()` to retrieve the full column specification for this data. ℹ#> Specify the column types or set `show_col_types = FALSE` to quiet this message.#> `summarise()` has grouped output by 'year'. You can override using the#> `.groups` argument.#> Joining with `by = join_by(lon_index)`#> Joining with `by = join_by(lat_index)`#> Joining with `by = join_by(time_index)`#> Rows: 3944 Columns: 7#> ── Column specification#> ──────────────────────────────────────────────────────── Delimiter: "," chr#> (1): month dbl (5): sst, lon, lat, time, year date (1): date#> ℹ Use `spec()` to retrieve the full column specification for this data. ℹ#> Specify the column types or set `show_col_types = FALSE` to quiet this message.#> `summarise()` has grouped output by 'year'. You can override using the#> `.groups` argument.#> Joining with `by = join_by(lon_index)`#> Joining with `by = join_by(lat_index)`#> Joining with `by = join_by(time_index)`#> Rows: 3944 Columns: 7#> ── Column specification#> ──────────────────────────────────────────────────────── Delimiter: "," chr#> (1): month dbl (5): sst, lon, lat, time, year date (1): date#> ℹ Use `spec()` to retrieve the full column specification for this data. ℹ#> Specify the column types or set `show_col_types = FALSE` to quiet this message.#> `summarise()` has grouped output by 'year'. You can override using the#> `.groups` argument.#> Joining with `by = join_by(lon_index)`#> Joining with `by = join_by(lat_index)`#> Joining with `by = join_by(time_index)`#> Rows: 3944 Columns: 7#> ── Column specification#> ──────────────────────────────────────────────────────── Delimiter: "," chr#> (1): month dbl (5): sst, lon, lat, time, year date (1): date#> ℹ Use `spec()` to retrieve the full column specification for this data. ℹ#> Specify the column types or set `show_col_types = FALSE` to quiet this message.#> `summarise()` has grouped output by 'year'. You can override using the#> `.groups` argument.latranges <-data.frame(matrix(unlist(lat_ranges), ncol =2, byrow = T))lonranges <-data.frame(matrix(unlist(lon_ranges), ncol =2, byrow = T))# Plot SST by area in each monthsst_areas |>ggplot(aes(x = year, y = meanSST, group =as.factor(month), color =as.factor(month))) +geom_line() +scale_color_viridis(discrete =TRUE) +scale_x_continuous(breaks =seq(1940, 2020, 10)) +theme(plot.title =element_text(size =15, face ="bold")) +theme(axis.text.x =element_text(angle=90)) +theme(axis.text =element_text(size =12), axis.title =element_text(size =15)) +labs(x ="Year", y ="Mean SST", title ="Mean SST in each month by area", color ="Month") +facet_wrap(~area, scale ="free_y") +NULL
Code
sst_areas <- sst_areas |>mutate(date =paste(year, month, 15, sep ="-"),yday =yday(date),source ="errs",month =as.numeric(month)) |>rename(temp = meanSST)errs_dat <- sst_areas# Plot SST by month in each areaerrs_dat |>ggplot(aes(x = year, y = temp, group =as.factor(area), color =as.factor(area))) +geom_line() +scale_color_brewer(palette ="Set3") +scale_x_continuous(breaks =seq(1940, 2020, 10)) +theme(plot.title =element_text(size =15, face ="bold")) +theme(axis.text.x =element_text(angle =90)) +theme(axis.text =element_text(size =12), axis.title =element_text(size =15)) +labs(x ="Year", y ="Mean SST", title ="Mean SST in each month by area", color ="Area") +facet_wrap(~month, scale ="free_y") +NULL
Code
all_temp <-bind_rows(log_dat, errs_dat, fish_dat)# See if we can compare the mean temperature in a time of the year that have overlapall_temp |>filter(area =="FB") |>ggplot(aes(yday, fill = source)) +facet_wrap(~year, scales ="free_y") +geom_density(alpha =0.5, color =NA)
Code
# Plot some overlapping year and areasall_temp |>filter(area =="VN"& year >1995) |>ggplot(aes(yday, temp, color = source)) +geom_point() +facet_wrap(~year)
Code
all_temp |>filter(area =="JM"& year <1995& year >1975) |>ggplot(aes(yday, temp, color = source)) +geom_point() +facet_wrap(~year)
Code
all_temp |>filter(area =="FM"& year <2000& year >1980) |>ggplot(aes(yday, temp, color = source)) +geom_point() +facet_wrap(~year)
Code
# Some examples: OK match but not in BT which makes sense because the ERSS data isn't fine scale enoughall_temp |>filter(area =="FB"& year %in%c(1998:2001)) |>ggplot(aes(yday, temp, color = source)) +geom_point() +geom_line() +facet_wrap(~year)
Code
all_temp |>filter(area =="SI_EK"& year %in%c(1998:2001)) |>ggplot(aes(yday, temp, color = source)) +geom_point() +geom_line() +facet_wrap(~year)
Code
all_temp |>filter(area =="BT"& year %in%c(1998:2001)) |>ggplot(aes(yday, temp, color = source)) +geom_point() +geom_line() +facet_wrap(~year)
Code
# Take a close look at SI_EK 1995all_temp |>filter(source =="logger"& area =="SI_EK"& year ==1996) |>ggplot(aes(yday, temp)) +geom_point() +facet_grid(area~year)
Code
# Filter the row of very low values - broken logger?all_temp |>filter(source =="logger"& area =="SI_EK"& year ==1996) |>mutate(keep =ifelse(temp <6.54, "N", "Y")) |>ggplot(aes(yday, temp, color = keep)) +geom_point() +facet_grid(area~year)
Code
all_temp <- all_temp |>mutate(keep =ifelse(source =="logger"& area =="SI_EK"& year ==1996& temp <6.54, "N", "Y")) |>filter(keep =="Y") |>select(-keep)
Save data!
Code
# Save data for model fittingwrite_csv(all_temp, paste0(home, "/output/temp-data-for-fitting.csv"))